In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
In [3]:
LifeExpectancy = pd.read_csv("Life Expectancy.csv")
LifeExpectancy.head()
Out[3]:
Country Region Year Infant_deaths Under_five_deaths Adult_mortality Alcohol_consumption Hepatitis_B Measles BMI ... Diphtheria Incidents_HIV GDP_per_capita Population_mln Thinness_ten_nineteen_years Thinness_five_nine_years Schooling Economy_status_Developed Economy_status_Developing Life_expectancy
0 Turkiye Middle East 2015 11.1 13.0 105.8240 1.32 97 65 27.8 ... 97 0.08 11006 78.53 4.9 4.8 7.8 0 1 76.5
1 Spain European Union 2015 2.7 3.3 57.9025 10.35 97 94 26.0 ... 97 0.09 25742 46.44 0.6 0.5 9.7 1 0 82.8
2 India Asia 2007 51.5 67.9 201.0765 1.57 60 35 21.2 ... 64 0.13 1076 1183.21 27.1 28.0 5.0 0 1 65.4
3 Guyana South America 2006 32.8 40.5 222.1965 5.68 93 74 25.3 ... 93 0.79 4146 0.75 5.7 5.5 7.9 0 1 67.0
4 Israel Middle East 2012 3.4 4.3 57.9510 2.89 97 89 27.0 ... 94 0.08 33995 7.91 1.2 1.1 12.8 1 0 81.7

5 rows × 21 columns

In [5]:
LifeExpectancy.columns
Out[5]:
Index(['Country', 'Region', 'Year', 'Infant_deaths', 'Under_five_deaths',
       'Adult_mortality', 'Alcohol_consumption', 'Hepatitis_B', 'Measles',
       'BMI', 'Polio', 'Diphtheria', 'Incidents_HIV', 'GDP_per_capita',
       'Population_mln', 'Thinness_ten_nineteen_years',
       'Thinness_five_nine_years', 'Schooling', 'Economy_status_Developed',
       'Economy_status_Developing', 'Life_expectancy'],
      dtype='object')

Average Life Expectancy in Different Regions of the World¶

In [8]:
LifeExpectancy["Region"].unique()
NorthAmerica = LifeExpectancy[LifeExpectancy["Region"] == "North America"]
EuropeanUnion = LifeExpectancy[LifeExpectancy["Region"] == "European Union"]
Asiaa =  LifeExpectancy[LifeExpectancy["Region"] == "Asia"]
Africaa = LifeExpectancy[LifeExpectancy["Region"] == "Africa"]
In [10]:
NA = NorthAmerica[["Year","Life_expectancy"]].groupby("Year").mean()
EU = EuropeanUnion[["Year","Life_expectancy"]].groupby("Year").mean()
AS = Asiaa[["Year","Life_expectancy"]].groupby("Year").mean()
AF = Africaa[["Year","Life_expectancy"]].groupby("Year").mean()
AVG_LE = LifeExpectancy[["Year","Life_expectancy"]].groupby("Year").mean()

NA.reset_index(inplace = True)
EU.reset_index(inplace = True)
AS.reset_index(inplace = True)
AF.reset_index(inplace = True)

x = NA["Year"]
y_1 = NA["Life_expectancy"]
y_2 = EU["Life_expectancy"]
y_3 = AS["Life_expectancy"]
y_4 = AF["Life_expectancy"]
y_5 = AVG_LE["Life_expectancy"]

plt.plot(x, y_1, label = "North America")
plt.plot(x, y_2, label = "EU")
plt.plot(x, y_3, label = "Asia")
plt.plot(x, y_4, label = "Africa")
plt.plot(x, y_5, label = "World Average")

plt.legend(loc = "lower right", fontsize = "8")
plt.xlabel("Years")
plt.ylabel("Life Expectancy")
plt.title("Average Life Expectancy in Different Regions from 2000-2015 ")
plt.show()

The Relationship Between Adult Mortality and Life Expectancy¶

In [13]:
LifeExpectancy[["Adult_mortality", "Life_expectancy"]].plot.scatter(x = "Adult_mortality", y = "Life_expectancy")
plt.xlabel("Adult Mortality")
plt.ylabel("Life Expectancy")
plt.title("Relationship Between Life Expectancy and Adult Mortality ")
Out[13]:
Text(0.5, 1.0, 'Relationship Between Life Expectancy and Adult Mortality ')
In [15]:
LifeExpectancy[LifeExpectancy["Adult_mortality"] == LifeExpectancy["Adult_mortality"].max()]["Region"]
Out[15]:
2515    Africa
Name: Region, dtype: object
In [17]:
LifeExpectancy[LifeExpectancy["Region"] == "Africa"]["Life_expectancy"].mean()
Out[17]:
57.84730392156862
In [19]:
LifeExpectancy[LifeExpectancy["Adult_mortality"] == LifeExpectancy["Adult_mortality"].min()]["Region"]
Out[19]:
1613    Rest of Europe
Name: Region, dtype: object
In [21]:
LifeExpectancy[LifeExpectancy["Region"] == "Rest of Europe"]["Life_expectancy"].mean()
Out[21]:
74.52541666666667
In [23]:
LifeExpectancy[LifeExpectancy["Adult_mortality"] == LifeExpectancy["Adult_mortality"].min()]["Region"]
Out[23]:
1613    Rest of Europe
Name: Region, dtype: object

Mean Life Expectancy in Different Regions of the World¶

In [26]:
LifeExpectancy[["Life_expectancy", "Region"]].groupby(["Region"]).agg("mean").plot.bar()

plt.xlabel("Regions of the World")
plt.ylabel("Life Expectancy")
plt.title("Mean Life Expectancy in Different Regions of the World")
plt.show()

The Relationship Between GDP per Capita and Life Expectancy¶

In [29]:
GDP = LifeExpectancy["GDP_per_capita"]
LifeExp = LifeExpectancy["Life_expectancy"]
Constant = sm.add_constant(GDP)
sm.OLS(LifeExp, Constant).fit().summary()
# GDP = 65.1186 + 0.0003 * LE
## GDP = -6.075E04 + 1049.8525 * LifeExp
Out[29]:
OLS Regression Results
Dep. Variable: Life_expectancy R-squared: 0.340
Model: OLS Adj. R-squared: 0.340
Method: Least Squares F-statistic: 1474.
Date: Sat, 27 Jul 2024 Prob (F-statistic): 1.52e-260
Time: 20:54:40 Log-Likelihood: -9887.4
No. Observations: 2864 AIC: 1.978e+04
Df Residuals: 2862 BIC: 1.979e+04
Df Model: 1
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const 65.1186 0.173 376.787 0.000 64.780 65.457
GDP_per_capita 0.0003 8.43e-06 38.397 0.000 0.000 0.000
Omnibus: 323.832 Durbin-Watson: 2.005
Prob(Omnibus): 0.000 Jarque-Bera (JB): 443.462
Skew: -0.961 Prob(JB): 5.05e-97
Kurtosis: 3.158 Cond. No. 2.48e+04


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.48e+04. This might indicate that there are
strong multicollinearity or other numerical problems.
In [31]:
GDP1 = LifeExpectancy["GDP_per_capita"]
LifeExp1 = LifeExpectancy["Life_expectancy"]
X = np.linspace(GDP1.min(), GDP1.max())
Y = 65.1186 + 0.0003 * X

#LifeExpectancy[["GDP_per_capita","Life_expectancy"]].plot.scatter(x="GDP_per_capita",y="Life_expectancy",alpha=0.3)
plt.scatter(GDP1,LifeExp1,alpha=0.3)
plt.plot(X,Y,"r")
plt.ylabel("Life Expectancy")
plt.xlabel("GDP per Capita")
plt.title("Relationship Between Life Expectancy and GDP per Capita")
plt.show()

The Relationship Between Schooling and Life Expectancy¶

In [34]:
School1 = LifeExpectancy["Schooling"]
Life1 = LifeExpectancy["Life_expectancy"]
Const = sm.add_constant(School1)
sm.OLS( Life1, Const).fit().summary()
# Life = 52.2771 + 2.1723 * School
Out[34]:
OLS Regression Results
Dep. Variable: Life_expectancy R-squared: 0.537
Model: OLS Adj. R-squared: 0.536
Method: Least Squares F-statistic: 3313.
Date: Sat, 27 Jul 2024 Prob (F-statistic): 0.00
Time: 20:54:44 Log-Likelihood: -9381.2
No. Observations: 2864 AIC: 1.877e+04
Df Residuals: 2862 BIC: 1.878e+04
Df Model: 1
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const 52.2771 0.312 167.609 0.000 51.666 52.889
Schooling 2.1723 0.038 57.560 0.000 2.098 2.246
Omnibus: 258.863 Durbin-Watson: 1.985
Prob(Omnibus): 0.000 Jarque-Bera (JB): 333.670
Skew: -0.786 Prob(JB): 3.50e-73
Kurtosis: 3.570 Cond. No. 21.8


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [36]:
School1 = LifeExpectancy["Schooling"]
Life1 = LifeExpectancy["Life_expectancy"]
plt.scatter(School1, Life1, alpha = 0.3)

X = np.linspace(0, 16)
Y = 52.2771 + 2.1723 * X

plt.plot(X, Y, "r")
plt.xlabel("Years of Schooling")
plt.ylabel("Life Expectancy")
plt.title("Relationship Between Schooling and Life Expectancy")
plt.show()

The Relationship Life Expectancy and HIV Rates in Developed Countries¶

In [39]:
Developed = LifeExpectancy[LifeExpectancy["Economy_status_Developed"] == 1]
Developing =  LifeExpectancy[LifeExpectancy["Economy_status_Developing"] == 1]
In [41]:
x_1 = Developed["Incidents_HIV"]
x_2 = Developing["Incidents_HIV"]
y_1 = Developed["Life_expectancy"]
y_2 = Developing["Life_expectancy"]
In [43]:
plt.scatter(x_1,y_1)
plt.xlabel("Percentage of Population with HIV")
plt.ylabel("Life Expectancy")
plt.title("Life Expectancy and HIV Rates in Developed Countries")
HIV = np.linspace(0,0.3)
DLE = 78.3592 + 1.9529 * HIV
plt.plot(HIV, DLE, "r")
Out[43]:
[<matplotlib.lines.Line2D at 0x1519e1b50>]

The Relationship Life Expectancy and HIV Rates in Developing Countries¶

In [46]:
plt.scatter(x_2,y_2)
plt.xlabel("Percentage of Population with HIV")
plt.ylabel("Life Expectancy")
plt.title("Life Expectancy and HIV Rate in Developing Countries")
HIV = np.linspace(0,22)
ULE = 68.4233 - 1.8791 * HIV
plt.plot(HIV, ULE, "r")
Out[46]:
[<matplotlib.lines.Line2D at 0x151aa1f10>]

The Predicted Life Expectancy for 2016 to 2026¶

In [ ]:
Average_Slope_America = np.mean(np.diff(y_1) / np.diff(x))
Average_Slope_Europe = np.mean(np.diff(y_2) / np.diff(x))
Average_Slope_Asia = np.mean(np.diff(y_3) / np.diff(x))
Average_Slope_Africa = np.mean(np.diff(y_4) / np.diff(x))
Average_Slope_World = np.mean(np.diff(y_5) / np.diff(x))

print(Average_Slope_America, Average_Slope_Europe, Average_Slope_Asia,
     Average_Slope_Africa, Average_Slope_World)
In [ ]:
Dummy = pd.get_dummies(LifeExpectancy["Country"], drop_first = True)
LE = LifeExpectancy["Life_expectancy"]
YR = Dummy.join(LifeExpectancy["Year"])
constant = sm.add_constant(YR)
sm.OLS(LE, constant).fit().summary()
In [49]:
X = LifeExpectancy["Year"]
Y = LifeExpectancy["Life_expectancy"]
const = sm.add_constant(X)
sm.OLS(Y, const).fit().summary()
Out[49]:
OLS Regression Results
Dep. Variable: Life_expectancy R-squared: 0.030
Model: OLS Adj. R-squared: 0.030
Method: Least Squares F-statistic: 89.74
Date: Sat, 27 Jul 2024 Prob (F-statistic): 5.51e-21
Time: 20:55:03 Log-Likelihood: -10438.
No. Observations: 2864 AIC: 2.088e+04
Df Residuals: 2862 BIC: 2.089e+04
Df Model: 1
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const -645.1984 75.379 -8.559 0.000 -793.001 -497.396
Year 0.3557 0.038 9.473 0.000 0.282 0.429
Omnibus: 226.336 Durbin-Watson: 1.997
Prob(Omnibus): 0.000 Jarque-Bera (JB): 262.456
Skew: -0.718 Prob(JB): 1.02e-57
Kurtosis: 2.628 Cond. No. 8.74e+05


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 8.74e+05. This might indicate that there are
strong multicollinearity or other numerical problems.
In [51]:
Year = np.linspace(2016,2026)
LE_USA = -654.3982 + 18.2437 + 0.3557 * Year
LE_World = -645.1984 + 0.3557 * Year
LE_CAR = -654.3982 + 0.3557 * Year -13.1750
LE_JAP = -654.3982 + 0.3557 * Year + 22.8000

plt.plot(Year, LE_USA, label = "USA")
plt.plot(Year, LE_World, label = "World Average")
plt.plot(Year, LE_CAR, label = "Central African Republic")
plt.plot(Year, LE_JAP, label = "Japan")

plt.xlabel("Years")
plt.ylabel("Life Expectancy")
plt.title("Predicted Life Expectancy for 2016-2026")

plt.legend(loc = "lower left", fontsize = "8", bbox_to_anchor =(1, 0.5))
plt.show()
In [ ]: